# ------------------------------------------------------------------------------
# Reports stats on natural experiment to use in-text
# Author: Cassidy Shubatt <cshubatt@gmail.com>
# To run: bash 07_natexp_stats.sh
# ------------------------------------------------------------------------------

# Libraries --------------------------------------------------------------------
library(here)
library(yaml)
library(data.table)
library(tidyverse)
library(testit) # assert()
library(glue) # glue strings
library(OneR) # bin

temp <- here::here("code", "05_natural_experiment", "temp")
overnight_lab <- ""
u <- modules::use(here::here("lib", "util.R"))

# Load Data --------------------------------------------------------------------
message("Loading data...")
paths <- read_yaml(here::here("lib", "filepaths.yml"))
longterm_outcomes <- readRDS(paths$analysis$longterm_outcomes)
cohort <- readRDS(file.path(temp, "shift_test_rates.rds")) %>%
  setnames("p__ensemble__stent_or_cabg_010_day__tested", "yhat") %>%
  mutate(
    tile_leaveout_trate_005 = bin(
      shift_12_trate_leaveout, nbins = 5, labels = 1:5,
      method = "content"
    )
  ) %>%
  filter(split != "val")

# Find pct of yhat variation due to shift test rate
message("Getting risk variation from shift test rates...")
fit <- lm(test_010_day ~ yhat, data = cohort)
beta <- coef(fit)["yhat"]

t5 <- filter(cohort, tile_leaveout_trate_005 == 5)
t1 <- filter(cohort, tile_leaveout_trate_005 == 1)

yhat_diff <- mean(t5$yhat) - mean(t1$yhat)
tbar_diff <- mean(t5$shift_12_trate_leaveout) - mean(t1$shift_12_trate_leaveout)

pct_var <- beta*yhat_diff/tbar_diff
message("Pct of yhat variation from shift testing differences: ", pct_var)

# disp yhat by shift testing tile
summary <- cohort %>%
  group_by(tile_leaveout_trate_005) %>%
  summarize(
    n = n(),
    tbar = mean(shift_12_trate_leaveout),
    mean_yhat = mean(yhat)
  ) %>%
  ungroup

print(summary)
